
import { MixSegment, createMixSegment } from "./mixSegment"
import { readFile, createObj, Word, weightCompare_A, PTMap, createPTMap } from "./utility"


let segment_: MixSegment
let idfMap_: PTMap<number> = createPTMap()
let idfAverage_: number = 0

let stopWords_: string[] = []

export interface KeywordExtractor {
    extr(str: string, topN?: number): string[]
}

export function createKeywordExtractor(dictPath: string, hmmFilePath: string, idfPath: string, stopWordPath: string, userDict = ""): KeywordExtractor {
    if (!segment_) {
        segment_ = createMixSegment(dictPath, hmmFilePath, userDict)
        loadIdfDict(idfPath)
        loadStopWordDict(stopWordPath)
    }

    return {
        extr
    }

}

function extr(str: string, topN?: number): string[] {
    let words: string[] = segment_.cut(str)
    let wordmap: Map<string, Word> = new Map<string, Word>()

    let offset = 0

    for (let i = 0; i < words.length; ++i) {
        let t = offset
        offset += words[i].length

        if (stopWords_.indexOf(words[i]) !== -1) {
            continue
        }

        let wr = createObj<Word>()
        wr.offsets = []
        wr.offsets.push(t)
        wr.weight = 1.0
        wordmap.set(words[i], wr)

    }

    if (offset != str.length) {
        return

    }

    let keys: Word[] = []

    wordmap.forEach((v, k) => {
        let cit = idfMap_[k]
        if (cit) {
            v.weight *= cit
        } else {
            v.weight *= idfAverage_
        }
        v.word = k
        keys.push(v)

    })
    let keys2: Word[] = []
    keys2 = keys.sort(weightCompare_A)

    if (topN < keys2.length) keys2 = keys2.slice(0, topN)

    let ret: string[] = []
    keys2.forEach(v => {
        console.log(v)
        ret.push(v.word)
    })

    return ret
}

let content: string[]
let line = ""
let i = 0

function loadIdfDict(idfPath: string) {

    try {
        content = readFile(idfPath).replace(/(\n\r|\n)/g, "\n").split("\n")

    } catch (e) {
        throw ("idfPath数据文件内容丢失")

    }

    if (!content) {
        throw ("idfPath数据文件内容丢失")

    }

    let buf: string[] = []
    let idf = 0
    let idfSum = 0
    let lineno = 0

    for (let i = 0; nextLine(); i++ , lineno++) {
        if (!line) {

        }

        buf = line.split(" ")
        if (buf.length !== 2) {

        }

        idf = +buf[1]

        idfMap_[buf[0]] = idf
        idfSum += idf

    }
    if (!lineno) {

    }

    idfAverage_ = idfSum / lineno
    if (idfAverage_ > 0) {

    }
}


function loadStopWordDict(filePath: string) {
    try {
        content = readFile(filePath).replace(/(\n\r|\n)/g, "\n").split("\n")

    } catch (e) {
        throw ("idfPath数据文件内容丢失")

    }

    if (!content) {
        throw ("idfPath数据文件内容丢失")

    }

    while (nextLine()) {
        stopWords_.push(line)

    }

}


function nextLine() {

    function getLine() {
        line = content[i]
        i++
        return line

    }

    while (getLine()) {

        line = line.trim()

        if (!line) {
            continue
        }

        if (line.indexOf("#") !== -1) {
            continue
        }

        return true

    }

    return false
}